* This dofile is to construct a dataset of mentors for grade 12 students: DONE!

cap log close 
log using "$logfile/cleaning_grade12_mentor_$date.log", replace 

cap program drop standardization 
program standardization, rclass 
	args i
	cap drop z_`i'
	recode `i' (99=.)
	egen mean`i' = mean(`i')
	egen sd`i' = sd(`i')
	g zm_`i' = (`i' - mean`i')/sd`i'
	drop mean`i' sd`i'
end


* open the raw data 
import delimited "$raw/mentor/20210918122002-SurveyExport-TVET.csv", varnames(1) encoding(UTF-8) clear 

* rename variables 
rename whatisyourname name_m 
rename whatisyourgender mentor_sex 
recode mentor_sex (2=0)
rename whatisyourmobilephonenumber mentor_phone
tostring mentor_phone, replace
rename whatisyouremailaddresswritenaify mentor_email 
replace mentor_email = trim(itrim(lower(mentor_email)))
rename whichtvetinstitutionareyoustudyi mentor_ttis
lab def mentor_ttis 1 "TTI Chumey" 2 "TTI Khuruthang" 3 "TTI Rangjung" 4 "TTI Samthang" 5 "TTI Thimphu" 6 "Jigme Wangchuk Power" 7 "NIZC Thimphu" 8 "NIZC Trashi Yangste" 9 "Royal Institute of Tourism and Hospitality" 77 "Other" 99 "N/A", modify
lab val mentor_ttis mentor_ttis

rename dateofbirthdaywhatisyourdateofbi mentor_bday
rename dateofbirthmonthwhatisyourdateof mentor_bmonth
rename dateofbirthyearwhatisyourdateofb mentor_byear 
rename whichdzongkhagareyouoriginallyfr mentor_dist_origin 
rename whichdzongkhagareyoucurrentlyliv mentor_dist_current
rename whichofthefollowingwastrueabouty mentor_tvet_entrance 
lab def mentor_tvet_entrance 1 "after grade 12" 2 "during high school" 3 "after grade 10"
lab val mentor_tvet_entrance mentor_tvet_entrance
lab var mentor_tvet_entrance "when did the mentor enter tvet"

cap g mentor_school=""
replace mentor_school=whichsecondaryschooldidyougradua if inlist(mentor_tvet_entrance, 1, 2)
replace mentor_school=pleasespecifythenameofyoursecond if mentor_school=="Other (Specify)" & inlist(mentor_tvet_entrance, 1, 2)
replace mentor_school=v18 if mentor_tvet_entrance==3
replace mentor_school=pleasespecifythenameofyoursecond if mentor_school=="Other (Specify)" & mentor_tvet_entrance==3
lab var mentor_school "last school that mentor attended"

cap rename whichacademicstreamdidyoustudyin mentor_astream
lab def mentor_astream 1 "Arts" 2 "Commerce" 3 "Science" 4 "Rigzhung"
lab val mentor_astream mentor_stream
lab var mentor_astream "academic stream at high school"

rename inwhichyearyougraduatedorexpectt mentor_graduation 

lab def mentor_graduation 1 "2015 or before" 2 "2016-2020" 3 "2021 or later" 99 "decline"
lab val mentor_graduation mentor_graduation

rename whatleveloftvetyouachievedorexpe mentor_level 

lab def mentor_level 1 "NC II" 2 "NC III" 3 "National Diploma" 4 "Diploma" 5 "RPL Certification" 
lab val mentor_level mentor_level

* satisfaction & recommendation abt current study program
rename areyousatisfiedwiththetrainingyo mentor_progsat
rename howmuchdoyourecommendtvetprogram mentor_progrec

* perception about science & technology
rename scienceandtechnologyaremakingour b2a
rename allthingsconsideredscienceandtec b2d

// need to do standardization for the sample of chosen mentors only!!!
g attitude_stem = (b2a*(b2a~=99) + b2d*(b2d~=99))/((b2a~=99) + (b2d~=99))
standardization attitude_stem
lab var zm_attitude_stem "Mentor: standardized attitude to science and technology (baseline)"
cap drop attitude_stem 

* gender bias in STEM
rename scienceeducationismoreimportantf b4a // reverse 
rename onaveragemenhavehigherabilityins b4b // reverse 
rename ifwomenhavestemmsciencetechnolog b4c // reverse 
rename stemmrelatedjobsareformenthanwom b4d // reverse 
rename whenjobsarescarcemenshouldhavemo b5b // reverse 
rename whenamotherworksforpaythechildre b5c // reverse 

// need to do standardization for the sample of chosen mentors only!!!
g genderbias = (b4a*(b4a~=99) + b4b*(b4b~=99) + b4c*(b4c~=99) + b4d*(b4d~=99) + b5b*(b5b~=99) + b5c*(b5c~=99))/((b4a~=99) + (b4b~=99) + (b4c~=99) + (b4d~=99) + (b5b~=99) + (b5c~=99))
standardization genderbias
lab var zm_genderbias "mentor: gender bias (baseline): higher value means more biased against females in STEM"

* experience as mentee 
rename haveyoueverhadamentorofyourown mentor_beenmentee 
recode mentor_beenmentee (2=0)
rename ifyouhavebeenamenteewereyousatis mentor_beenmentee_sat 

rename approximatelyhowmanypersonsmente mentor_exp

* Big Five 
rename isreservedhowmuchdoyouagreeordis p1a
rename tendstobelazyhowmuchdoyouagreeor p1b
rename isoutgoingsociablehowmuchdoyouag p1c
rename doesathoroughjobhowmuchdoyouagre p1d
rename getsnervouseasilyhowmuchdoyouagr p1e
rename hasanactiveimaginationhowmuchdoy p1f 
rename worriesalothowmuchdoyouagreeordi p1g
rename remainscalmintensesituationshowm p1h
rename istalkativehowmuchdoyouagreeordi p1i
rename isoriginalcomesupwithnewideashow p1j
rename valuesartisticaestheticexperienc p1k
rename issometimesrudetoothershowmuchdo p1l
rename hasaforgivingnaturehowmuchdoyoua p1m
rename isconsiderateandkindtoalmostever p1n
rename doesthingsefficientlyhowmuchdoyo p1o

g b5extro = ((5 - p1a)*(p1a~=99) + p1c*(p1c~=99) + p1i*(p1i~=99))/((p1a~=99) + (p1c~=99) + (p1i~=99))
g b5agree = ((5 - p1l)*(p1l~=99) + p1m*(p1m~=99) + p1n*(p1n~=99))/((p1l~=99) + (p1m~=99) + (p1n~=99))
g b5consci = (p1d*(p1d~=99) + (5 - p1b)*(p1b~=99) + p1o*(p1o~=99))/((p1d~=99) + (p1b~=99) + (p1o~=99))
g b5neuro = (p1e*(p1e~=99) + p1g*(p1g~=99) + (5 - p1h)*(p1h~=99))/((p1e~=99) + (p1g~=99) + (p1h~=99))
g b5open = (p1j*(p1j~=99) + p1k*(p1k~=99) + p1f*(p1f~=99))/((p1j~=99) + (p1k~=99) + (p1f~=99))

// need to do standardization for the sample of chosen mentors only!!!
foreach variable in b5extro b5agree b5consci b5neuro b5open {
standardization `variable'
}
lab var zm_b5extro "mentor standardized: big 5 extroversion"
lab var zm_b5agree "mentor standardized: big 5 agreeness"
lab var zm_b5consci "mentor standardized: big 5 conscientiousness"
lab var zm_b5neuro "mentor standardized: big 5 neuroticism"
lab var zm_b5open "mentor standardized: big 5 openness"

* hobby 
rename listentomusichowmuchdoyoulikethe mentor_hobby1 
rename playmusichowmuchdoyoulikethefoll mentor_hobby2 
rename watchsportshowmuchdoyoulikethefo mentor_hobby3
rename playsportshowmuchdoyoulikethefol mentor_hobby4
rename watchtvhowmuchdoyoulikethefollow mentor_hobby5
rename playvideoandonlinegameshowmuchdo mentor_hobby6

* format name
replace name_m=lower(name_m)
replace name_m = subinstr(name_m,"@gemailcom","",.)   //Removes irrelevant info
replace name_m = subinstr(name_m,"@gmailcom","",.)   //Removes irrelevant info
g mentor_name = name_m
replace mentor_name = trim(itrim(lower(mentor_name)))
replace mentor_name = subinstr(mentor_name," ","",.)  
replace mentor_name = subinstr(mentor_name,",","",.)   //Removes comma (,)
replace mentor_name = subinstr(mentor_name,"'","",.)   //Removes apostrophe (')
replace mentor_name = subinstr(mentor_name,".","",.)   //Removes dot (.) 
replace mentor_name = subinstr(mentor_name,"/","",.)   //Removes slash (/)
replace mentor_name = subinstr(mentor_name,"-","",.)   //Removes dash (-)
replace mentor_name = subinstr(mentor_name,"=","",.)   //Removes dash (-)
replace mentor_name = subinstr(mentor_name,"(","",.)   //Removes opening parentheses
replace mentor_name = subinstr(mentor_name,")","",.)   //Removes closing parentheses
replace mentor_name = subinstr(mentor_name,"mynameis","",.)   //Removes irrelevant info
replace mentor_name = subinstr(mentor_name,"@gmailcom","",.)   //Removes irrelevant info
replace mentor_name = subinstr(mentor_name,"@gemailcom","",.)   //Removes irrelevant info
format mentor_name %25s

* for duplicated submissions, keep the earlier ones 
sort mentor_name mentor_phone responseid 
drop if mentor_name==mentor_name[_n-1] & mentor_phone==mentor_phone[_n-1]

keep mentor_* responseid zm_* b5*
save $temp/temp, replace 

* retrieve assigned mentorid 
clear 
import excel "$randomization/final/randomization_TVET_mainlist_9Oct2021.xlsx", sheet("Sheet1") firstrow
duplicates drop mentorid, force 
replace mentor_name = trim(itrim(lower(mentor_name)))
replace mentor_name = subinstr(mentor_name," ","",.)  
replace mentor_name = subinstr(mentor_name,",","",.)   //Removes comma (,)
replace mentor_name = subinstr(mentor_name,"'","",.)   //Removes apostrophe (')
replace mentor_name = subinstr(mentor_name,".","",.)   //Removes dot (.) 
replace mentor_name = subinstr(mentor_name,"/","",.)   //Removes slash (/)
replace mentor_name = subinstr(mentor_name,"-","",.)   //Removes dash (-)
replace mentor_name = subinstr(mentor_name,"=","",.)   //Removes dash (-)
replace mentor_name = subinstr(mentor_name,"(","",.)   //Removes opening parentheses
replace mentor_name = subinstr(mentor_name,")","",.)   //Removes closing parentheses
replace mentor_name = subinstr(mentor_name,"mynameis","",.)   //Removes irrelevant info
replace mentor_name = subinstr(mentor_name,"@gmailcom","",.)   //Removes irrelevant info
replace mentor_name = subinstr(mentor_name,"@gemailcom","",.)   //Removes irrelevant info
format mentor_name %25s

replace mentor_email = trim(itrim(lower(mentor_email)))
replace mentor_email = "tkamalsur548@gmail.com" if mentor_email=="tkamalsur548@gmaiol.com"

g mentor_phone1 = substr(mentor_phone, 1, 8)
g mentor_phone2 = substr(mentor_phone, 10, 8)
drop mentor_phone 

rename mentor_tti mentor_ttir
rename mentor_phone1 mentor_phone 
rename mentor_email mentor_emailr 

keep mentor_* mentorid 
 
// merge data using phone and name
merge 1:1 mentor_phone mentor_name using $temp/temp, keepusing(mentor_* responseid) 

* matched data 
preserve 
keep if _merge==3
drop _merge 
save $temp/temp1m, replace 
restore 

* unmatched data 
preserve 
keep if _merge==1 
drop _merge responseid 
rename mentor_name mentor_namer
save $temp/temp1r, replace 
restore 

preserve 
keep if _merge==2 
drop _merge mentorid 
rename mentor_name mentor_names
rename mentor_email mentor_emails
save $temp/temp1s, replace 
restore 

// merge unmatched data using phone and fuzzy name
use $temp/temp1s, clear 
merge 1:m mentor_phone using $temp/temp1r, keepusing(mentor_phone* mentor_name* mentorid mentor_email* mentor_ttir) 
matchit mentor_names mentor_namer, g(namescore)

preserve 
keep if _merge==3 & namescore>0.5
drop _merge namescore 
save $temp/temp2m, replace 
restore 

* unmatched data 
preserve 
keep if _merge==1 | ~(_merge==3 & namescore>0.5)
drop _merge mentorid
rename mentor_phone mentor_phones
keep mentor_names mentor_emails responseid mentor_phones mentor_bday mentor_bmonth mentor_byear mentor_ttis 
save $temp/temp2s, replace 
restore 

preserve 
keep if _merge==2 | ~(_merge==3 & namescore>0.5) 
drop _merge responseid
rename mentor_phone mentor_phoner
keep mentor_namer mentor_emailr mentor_phoner mentorid mentor_ttir
save $temp/temp2r, replace 
restore

// merge unmatched data using name and fuzzy email
use $temp/temp2r, clear
duplicates tag mentor_namer, g(dup)
preserve 
keep if dup==1
drop dup
save $temp/temp2rd, replace 

restore 
keep if dup==0
rename mentor_namer mentor_names 
merge 1:m mentor_names using $temp/temp2s
matchit mentor_phoner mentor_phones, g(phonescore)
matchit mentor_emailr mentor_emails, g(emailscore)

preserve 
keep if _merge==3 & (emailscore>0.5 | phonescore>0.8) 
drop _merge *score 
save $temp/temp3m, replace 
restore 

* unmatched data 
preserve 
keep if _merge==1 | (emailscore<0.5 & phonescore<0.8 & _merge==3)
drop _merge 
rename mentor_names mentor_namer
keep *r mentorid 
save $temp/temp3r, replace 
restore 

preserve 
keep if _merge==2 | (emailscore<0.5 & phonescore<0.8 & _merge==3)
drop _merge 
keep mentor_names mentor_emails responseid mentor_phones mentor_bday mentor_bmonth mentor_byear mentor_ttis 
save $temp/temp3s, replace 
restore

// Append data together 
clear
append using $temp/temp1m
append using $temp/temp2m
append using $temp/temp3m
keep mentorid responseid
unique mentorid

save $temp/g12_uniqueid_mentor, replace 

// Could not match survey submission of 40 mentors (!!!)
* Manually check and obtain gender information from the application package

use $temp/temp3r, clear 
append using $temp/temp2rd
sort mentor_namer

g responseid=.
replace responseid=345 if mentorid=="MTKS47"
replace responseid=202 if mentorid=="MTTY12"
replace responseid=391 if mentorid=="MTKS06"
replace responseid=141 if mentorid=="MTKS26"

preserve 
keep if responseid~=. 
keep mentorid responseid 
save $temp/temp4m, replace 
restore 

keep if responseid==. 
drop mentor_byear 
cap g mentor_sex = . 
cap g mentor_bday = .
cap g mentor_bmonth = ""
cap g mentor_byear = . 
cap g mentor_school = ""
cap g responseid = .
export excel using $temp/TVET_mentors_chosen_unmatched.xls, replace firstrow(varlabels)
save $temp/temp4r, replace 

use $temp/temp3s, clear
cap drop if inlist(responseid, 345, 202, 391) 
lab val mentor_ttis mentor_ttis

export excel using $temp/TVET_mentors_survey_unmatched.xls, replace firstrow(varlabels)
save $temp/temp4s, replace 


// Incorporate data from Cheku 
import excel using $raw/manual/TVET_mentors_chosen_unmatched.xls, clear firstrow 
rename mentor_sex sex
g mentor_sex = 0 if sex=="Female"
replace mentor_sex = 1 if sex=="Male"
rename mentor_bmonth bmonth
g mentor_bmonth = "January" if bmonth==1
replace mentor_bmonth = "February" if bmonth==2
replace mentor_bmonth = "March" if bmonth==3
replace mentor_bmonth = "April" if bmonth==4
replace mentor_bmonth = "May" if bmonth==5
replace mentor_bmonth = "June" if bmonth==6
replace mentor_bmonth = "July" if bmonth==7
replace mentor_bmonth = "August" if bmonth==8
replace mentor_bmonth = "September" if bmonth==9
replace mentor_bmonth = "October" if bmonth==10
replace mentor_bmonth = "November" if bmonth==11
replace mentor_bmonth = "December" if bmonth==12
keep mentorid mentor_name mentor_email mentor_sex mentor_bday mentor_bmonth mentor_byear mentor_school 
save $temp/temp5m, replace 

// Append data together 
clear 
forval i=1/4 {
	append using $temp/temp`i'm.dta
	cap g step = `i'
	replace step = `i' if missing(step)
}
keep responseid mentorid
merge 1:1 responseid using $temp/temp, keep(matched) nogen

append using $temp/temp5m
rename mentor_sex mentor_male

gen mentor_age = 2021 - mentor_byear

lab def sat 1 "not at all" 2 "slighly" 3 "moderately" 4 "very" 5 "extremely" 99 "decline to answer"
lab val mentor_beenmentee_sat sat

lab def experience 1 "none" 2 "1-2 persons" 3 "3-5 persons" 4 "6-10 persons" 5 "more than 10" 99 "don't know"
lab val mentor_exp experience

lab def recommendation 1 "strongly not" 2 "not" 3 "neutral" 4 "recommend" 5 "strongly recommend"
lab val mentor_progrec recommendation

lab var mentor_age "Age" 
lab var mentor_male "Male" 
lab var b5extro "Big 5 Extroversion" 
lab var b5agree "Big 5 Agreeness" 
lab var b5consci "Big 5 Conscientiousness"
lab var b5neuro "Big 5 Neuroticism"
lab var b5open "Big 5 Openness"

recode mentor_graduation mentor_level (99=.)


save "$clean/TVET_mentor.dta", replace




